from pyhanlp import *

# 准备训练和测试数据
train_file = "199801-train.txt"
test_file = "199801-test.txt"
CRFNERecognizer = JClass('com.hankcs.hanlp.model.crf.CRFNERecognizer')
# 构建自定义词典
CustomDictionary.add("智能科学与技术")

# 训练 CRF 模型
trainer = CRFNERecognizer()
trainer.train(train_file, "model/crf-ner-model.bin")

# 测试模型
segmenter = CRFSegmenter()
segmenter.loadModel("model/crf-ner-model.bin")
result = segmenter.segment("这是一段包含专业术语的文本")
print(result)